# -*- coding: utf-8 -*-
import random
import urllib

import re
import requests
import time
from bs4 import BeautifulSoup

user_agent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5)'
headers = {'User-Agent': user_agent}
session = requests.session()


def downAFile(fileName ,fileUrl ,downCallBack):
    downPath = 'D:\\xfmovie\\test\\101个儿童英语故事\\%s.mp3'%(fileName)
    urllib.request.urlretrieve(fileUrl, downPath ,downCallBack)
    print(fileName , "下载完成")

def downCallBack( downedCount, countSize, allSize):
    per = 100.0 * downedCount * countSize / allSize
    if per > 100:
        per = 100
    # print('下载进度：  %.2f%%' % per)


def getAsleep():
    sleepTime = random.uniform(0, 2)
    print("休眠：", sleepTime)
    time.sleep(sleepTime)

def getMp3AndLrc( url):
    page = session.get(url, headers=headers)
    # 处理乱码
    page.encoding = 'utf-8'
    soup = BeautifulSoup(page.text, 'lxml')
    # print(soup)
    diveName = soup.findAll(name='h1', attrs={"id": "nrtitle"})

    name = diveName[0].string
    name=name.replace(':' ,' ')
    array =name.split(" ")
    name = array[len(array)-1]

    mp3Url = soup.find(name='div', attrs={"style": "margin-bottom:4px;"})
    mp3Url =  mp3Url.findAll(name='script' )
    mp3Url = mp3Url[1]
    mp3Url =mp3Url.string
    start = mp3Url.find("thunder_url", 0, len(mp3Url))
    end  =  mp3Url.find("mp3", 0, len(mp3Url))
    mp3Url = mp3Url[start+14  :end+3]
    mp3Url = "http://k6.kekenet.com/%s" % (mp3Url)
    # print(  "\r\n\r\n" ,name ,mp3Url  )
    # 去除标题中的特殊字符
    name = re.sub("[\s+\.\!\/_,$%^*(+\"\']+|[+——！，。?|？、~@#￥%……&*（）]+", "", name)
    # 下载文件
    downAFile(name, mp3Url, downCallBack )
    divData = soup.find(name='div', attrs={"class": "info-qh"})
    lines =  divData.findAll(name='div' )
    # 字符串拼接方式，文字过多是 会超出系统限制。只适合小文件。
    # content = "".join([str(line.string)+"\n" for line in lines])
    # # 保存字符串内容
    # downPath = 'D:\\xfmovie\\test\101个儿童英语故事\\%s.lrc' % (name)
    # file = open(downPath, 'a')
    # file.write( content)
    # file.close()

    # divData =soup.findall

    # print("--------------------" ,name)
    downPath = 'D:\\xfmovie\\test\\101个儿童英语故事\\%s.lrc' % (name)
    file = open(downPath, 'a')
    for line in lines:
        print(line.string)
        content = str(line.string)
        file.write(content)
        file.write("\n")
    file.close()




def getAndSaveUrl( url):
    page = session.get(url, headers=headers)
    soup = BeautifulSoup(page.text, 'lxml')
    contents = soup.findAll('ul', id='menu-list')
    for child in contents:
        item = child.findAll('li')
        for hValue in item:
            h2 = hValue.find('h2')
            a = h2.find('a')
            path = a['href']
            getMp3AndLrc(path)



set = {
    "http://www.kekenet.com/child/16125/List_1.shtml",
    "http://www.kekenet.com/child/16125/List_2.shtml",
    "http://www.kekenet.com/child/16125/List_3.shtml",
    "http://www.kekenet.com/child/16125/List_4.shtml",
    "http://www.kekenet.com/child/16125/List_5.shtml",
    "http://www.kekenet.com/child/16125/List_6.shtml",
    "http://www.kekenet.com/child/16125/List_7.shtml",
    "http://www.kekenet.com/child/16125/List_8.shtml",
    "http://www.kekenet.com/child/16125/List_9.shtml",
    "http://www.kekenet.com/child/16125/"
}


for data in set:
    getAndSaveUrl(data)
    getAsleep()











